# _*_coding     : UTF_8_*_
# Author        :Jie Shen
# CreatTime     :2021/12/13 19:30
import time
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd


def send_request(url_):
    req = urllib.request.Request(url_)
    # 设置请求头
    req.add_header('User-Agent',
                   'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36')  # set user-agent header
    req.add_header('Cookie',
                   'UM_distinctid=17db33cb78c52d-05296d0cb64114-978153c-144000-17db33cb78e48c; cityPy=wuhan; cityPy_expire=1639999515; CNZZDATA1275796416=1862267477-1639383037-|1639437166; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1639394716,1639401030,1639442650,1639445300; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1639446119')
    response_ = urllib.request.urlopen(req)
    soup_ = BeautifulSoup(response_, "html.parser")
    # 给服务器减压，避免被封ip
    time.sleep(0.2)
    return soup_


def main_func():
    # 202002
    # 年: 2011 - 2021
    # 月: 1 - 12(2021年是1-11)
    # year
    url = "https://lishi.tianqi.com/wuhan/{year}{month}.html"
    year = [i for i in range(2011, 2022)]
    month = [i for i in range(1, 13)]
    csv_header = ["日期","最高气温","最低气温","天气","风向"]
    for y in year:
        for m in month:
            url_ = url.format(year=y, month=str(m).zfill(2))
            print(url_)
            soup = send_request(url_)
            ul_tag = soup.find_all("ul", class_="thrui")[0]
            li_tag = ul_tag.find_all("li")
            info = []
            for li in li_tag:
                p_list = []
                div = li.find_all("div")
                p_list = [d.string for d in div]
                info.append(p_list)
            filename = "csv/" + str(y) + str(m).zfill(2) + ".csv"
            pd.DataFrame(info).to_csv(filename, index=False, header=csv_header)


if __name__ == '__main__':
    main_func()
